#
# @author Lei Shi
# Department of Information Systems 
# University of Maryland, Baltimore County
# 2012
# 
# Copyright(c) 2012 by Department of Information Systems, UMBC
# All Rights Reserved
# 
# Permission to use, copy and modify this software must be granted
# by the author and provided that the above copyright notice appear 
# in all relevant copies and that both that copyright notice and this 
# permission notice appear in all relevant supporting documentations.
# 
# Comments and additions may be sent the author at leishi1@umbc.edu
# 
#

library(MASS)


#library functions

"rho.biweight" <-
function(x,c){
   hulp<-x^2/2-x^4/(2*c^2)+x^6/(6*c^4)
   rho<-hulp*(abs(x)<c)+c^2/6*(abs(x)>=c)
   rho}

"sestck" <-
function(x,start,c,k,tol)
{
if (start>0) s<-start
else  
  {
  x <- as.matrix(x)
  x <- abs(x)
  n<-nrow(x)
  p<-ncol(x)
  x<-apply(x,2,sort)       
  if (floor(n/2)==n/2) s<-(x[n/2,]+x[(n+2)/2,])/2
  else s<-x[(n+1)/2,]
  s <- s/0.6745
  }
crit<-2*tol
rhoold<-mean(rho.biweight(x/s,c))-k
while (crit>=tol){
      delta<-rhoold/mean(psi.bisquare(x/s,c)*(x^2/(s^3)))
      isqu<-1
      okay<-0
      while((isqu<10)&(okay!=1)){
          rhonew<-mean(rho.biweight(x/(s+delta),c))-k
          if (abs(rhonew)<abs(rhoold)){
              s<-s+delta
              okay<-1
                           } 
          else {
              delta<-delta/2 
              isqu<-isqu+1
               }
       }
     if (isqu==10) crit<-0
     else crit<-(abs(rhoold)-abs(rhonew))/max(abs(rhonew),tol)
     rhoold<-rhonew
     }
scale<-abs(s)
scale
}

"slc" <-
function(x,nsamp=500,bdp=0.5)
{
Tbsb<-function(c,p){
ksiint<-function(cc,ss,pp){(2^ss)*gamma(ss+pp/2)*pgamma(cc^2/2,ss+pp/2)/gamma(pp/2)}
y1<-ksiint(c,1,p)*3/c-ksiint(c,2,p)*3/(c^3)+ksiint(c,3,p)/(c^5)
y2<-c*(1-pchisq(c^2,p))
res<-y1+y2
res 
}

if (!require(MASS))
stop("cannot load required library MASS")

if (is.null(dim(x))) x=as.matrix(x)                                                         
if (any(is.na(x))) stop("missing values are not allowed")                                   
if (bdp!=0.5 & bdp != 0.15 & bdp != 0.25) stop("bdp must be 0.15,0.25 or 0.5")              
n<-nrow(x)
p<-ncol(x)
if (p > n) stop("number of observations must be greater than number of variables")          
tol<-10^(-5)
s<-10^(11)

tbdp<- sqrt(qchisq(1-bdp,p))
maxit<-1000
eps<-10^(-8)
diff<-10^6
ctest<-tbdp
iter<-1
while ((diff>eps)& (iter<maxit)) 
  {
  cold<-ctest
  ctest<-Tbsb(cold,p)/bdp
  diff<-abs(cold-ctest)
  eter<-iter+1
  }
c<-ctest

k<-(c/6)*Tbsb(c,p)
la<-1

for (i in 1:nsamp)
  {    # global improvement
  ranset<- sample(1:n,p+1)                                                                  
  xj<-as.matrix(x[ranset,])                                                                 
  mu<-apply(xj,2,mean)
  cov<-var(xj)*(nrow(xj)-1)/nrow(xj)
  determ<-det(cov)
  if ((determ>10^(-15))&(determ^(1/p)>10^(-5))) 
    {
    cov<-determ^(-1/p)*cov
    if (i>ceiling(nsamp/5)) 
      {
      if (i==ceiling(nsamp/2)) la<-2
      if (i==ceiling(nsamp*.8)) la<-4
      random<- runif(1) 
      random<-random^la
      mu<-random*mu+(1-random)*muopt                                                      
      cov<-random*cov+(1-random)*covopt
      determ<-det(cov)
      cov<-determ^(-1/p)*cov
      }
    md<-mahalanobis(x,mu,cov,inverted = FALSE, tol.inv =.Machine$double.eps)
    md<-md^(1/2)
    if (mean(rho.biweight(md/s,c))<k) 
      {
      if (s<5*10^10) s<-sestck(md,s,c,k,tol)
      else s<-sestck(md,0,c,k,tol)
      muopt<-mu
      covopt<-cov
      mdopt<-md                                                                            
      psi<-psi.bisquare(md,s*c)*md
      u<-psi.bisquare(md,s*c)
      ubig<-matrix(t(u),nrow=length(u),ncol=p,byrow=FALSE)
      aux<-(ubig*x)/mean(u)
      mu<-apply(aux,2,mean)
      xcenter<-t(t(x)-mu)
      cov<-t(ubig*xcenter)%*%xcenter
      cov<-det(cov)^(-1/p)*cov
      okay<-0
      jj<-1
      while ((jj<3)&(okay!=1)) 
        {
        jj<-jj+1
        md<-mahalanobis(x,mu,cov,tol.inv =.Machine$double.eps)
        md<-md^(1/2)
        if (mean(rho.biweight(md/s,c))<k)
          {
          muopt<-mu
          covopt <-cov
          mdopt <-md
          okay<-1
          if (s<5*10^10) s<-sestck(md,s,c,k,tol)
          else s<-sestck(md,0,c,k,tol)
          }
        else 
          {
          mu<-(mu+muopt)/2
          cov<-determ^(-1/p)*(cov+covopt)/2
          }
        }
      }
    } 
  }
res.mean<-muopt
res.covariance<-covopt*s^2
res.distances<-mdopt/s
res.scale<-s
list(location=res.mean,covariance=res.covariance,distances=res.distances,scale=res.scale,c=c)
}


#continuous
x <- mvrnorm(400,c(0,0,0),matrix(c(1,0,0,0,1,0.5,0,0.5,1),ncol=3))
print(apply(x,2,mean))
print(cov(x))
y <- mvrnorm(100,c(4,4,4),matrix(c(1,0,0,0,1,0.5,0,0.5,1),ncol=3))
x<- rbind(x,y)

results1<-slc(x,nsamp=100,bdp=0.5)
z<-results1$distances
y1 <- 1:500
plot(y1, z, main="", ylab="z")

md<-mahalanobis(x,apply(x,2,mean),cov(x),inverted = FALSE, tol.inv =.Machine$double.eps)
y1 <- 1:500
plot(y1, md, main="", ylab="z")
par(mfrow=c(2,1),mar=c(5,4,2,1)) 
plot(y1, z, main="Robust Mahalanobis Distance", xlab="Point No.", ylab="Mahalanobis Dist.")


#Discrete

x <- rmultinom(400, size = 8, prob=c(0.3,0.2,0.3))
x <- t(x)

y <- rmultinom(100, size = 14, prob=c(0.3,0.2,0.3))
y <- t(y)
x<- rbind(x,y)

results1<-slc(x,nsamp=100,bdp=0.5)

print(results1)
z<-results1$distances
y1 <- 1:500
plot(y1, z, main="", ylab="z")

md<-mahalanobis(x,apply(x,2,mean),cov(x),inverted = TRUE, tol.inv =.Machine$double.eps)
y1 <- 1:500

par(mfrow=c(2,1),mar=c(5,4,2,1)) 
plot(y1, z, main="Robust Mahalanobis Distance", xlab="Point No.", ylab="Mahalanobis Dist.")
plot(y1, md, main="Non-robust Mahalanobis Distance", xlab="Point No.", ylab="Mahalanobis Dist.")
